#
#===============================================================================
#
#         FILE:  PhastCons.pm
#
#  DESCRIPTION:  Package to handle Position weight matrices
#         BUGS:  ---
#        NOTES:  The package is spun off from the AnnotationIO.pm
#       AUTHOR:  Chaolin Zhang (cz), czhang@rockefeller.edu
#      COMPANY:  Rockefeller University
#      VERSION:  1.0
#      CREATED:  12/16/10 20:58:53
#     REVISION:  ---
#===============================================================================


package PhastCons;

require Exporter;

our $VERSION = 1.01;

@ISA = qw (Exporter);

@EXPORT = qw (
	indexBigPhastConsFile
	readBigPhastConsFile
	readPhastConsIndexFile

);

=head1 NAME

PhastCons - read and write bio annotation files

=cut

use strict;
use warnings;

use Data::Dumper;
use Carp;

use Common;


my $debug = 0;


=head2 indexBigPhastConsFile


index PhastCons file
the pointer indicate the position of the first line of each block

#->chromStart
#->chromEnd
#->chrom
#->pointer

my $indices = indexBigPhastConsFile ($inFile)

=cut

sub indexBigPhastConsFile
{
	my $in = $_[0];
	my @ret;
	my $fin;

	if ($in=~/\.gz$/)
	{
		open ($fin, "gunzip -c $in | ") ||Carp::croak "cannot open file $in to read\n";
	}
	else
	{
		open ($fin, "<$in")|| Carp::croak "can not open file $in to read\n";
	}

	my $entry = 0;
	my $n = 0;
	while (my $line=<$fin>)
	{
		chomp $line;
		next if $line=~/^\s*$/;
		if ($line=~/^fixedStep\schrom=(.*?)\sstart=(\d+)\sstep=(\d+)$/)
		{
			if ($entry)
			{
				$entry->{"chromEnd"} = $entry->{"chromStart"} + $n - 1;
				push @ret, $entry;
			}
			$entry = {chrom=>$1, chromStart=>$2, step=>$3, pointer=>tell($fin)};
			$n = 0;
		}
		else
		{
			next unless $entry;
			$n++;
		}
	}
	close ($fin);

	if ($entry)
	{
		$entry->{"chromEnd"} = $entry->{"chromStart"} + $n - 1;
		push @ret, $entry;
	}
	return {file=>Common::getFullPath($in), index=>\@ret};
}

=head2 readPhastConsIndexFile

Notes: original name: readPhastconsIndexFile

Index phastcons file for efficient accessing

my $indices = readPhastConsIndexFile ($inFile);

Status: tested


=cut

sub readPhastConsIndexFile
{
	my $in = $_[0];
	my $fin;
	open ($fin, "<$in") || Carp::croak "can not open file $in to read\n";
	my $line = <$fin>;
	chomp $line;
	my ($tmp, $file) = split ("=", $line);
	my @ret;
	while ($line =<$fin>)
	{
		chomp $line;
		next if $line=~/^\s*$/;
		my ($chrom, $chromStart, $chromEnd, $step, $pointer) = split ("\t", $line);
		push @ret, {chrom=>$chrom, chromStart=>$chromStart, chromEnd=>$chromEnd, step=>$step, pointer=>$pointer};
	}
	close ($fin);
	return {file=>$file, index=>\@ret};
}

=head2 readBigPhastConsFile

read phastcons between $chromStart and $chromEnd from a block specified by $blockInfo


my $info = readBigPhastConsFile ($inFile, $blockInfo, $chromStart, $chromEnd);


blockInfo{chrom=>, chromStart=>, chromEnd=>, step=>, pointer=>
 	generated by readPhastConsIndexFile (above)

scores in the region specified by $chromStart and $chromEnd will be returned

return: 
{
	chrom=>$blockInfo->{"chrom"}, 
	chromStart=>$start, 
	chromEnd=>$end, 
	step=>$step, 
	scores=>\@ret
}

Status: tested

=cut

sub readBigPhastConsFile
{
	my ($in, $blockInfo, $chromStart, $chromEnd) = @_;
	my $blockChromStart = $blockInfo->{"chromStart"} - 1;
	my $blockChromEnd = $blockInfo->{"chromEnd"} - 1;
	#fix 05/08/2011
	#Note wiggle files are 1-based coordinates, so we convert it into 0-based coordinates
	#to be consistent with the bed format
	

	my $step = $blockInfo->{"step"};
	my @ret;
	
	return 0 if ($blockChromStart > $chromEnd || $chromStart > $blockChromEnd);	#no overlap
	
	my $start = ($blockChromStart >= $chromStart) ? $blockChromStart : $chromStart;
	my $end = ($blockChromEnd <= $chromEnd) ? $blockChromEnd : $chromEnd;

	my $fin;
	open ($fin, "<$in") || Carp::croak "can not open file $in to read\n";
	seek ($fin, $blockInfo->{"pointer"}, 0);	#go to the first line of the block

	for (my $i = $blockChromStart; $i <= $blockChromEnd; $i+= $step)
	{
		my $line = <$fin>;
		chomp $line;
		if ($i >= $start && $i <= $end)
		{
			push @ret, $line;
		}
		elsif ($i > $end)
		{
			last;
		}
	}
	close ($fin);
	
	return {chrom=>$blockInfo->{"chrom"}, chromStart=>$start, chromEnd=>$end, step=>$step, scores=>\@ret};
}



1;


